#install.packages("Rcpp")
library(Rcpp)
cppFunction('
#include <numeric>
int gcd_r(int a, int b) {
a = abs(a);
b = abs(b);
return std::gcd(a, b);
}
int lcm_r(int a, int b) {
a = abs(a);
b = abs(b);
return abs(a * (b / std::gcd(a, b)));
}
')506 hw5
repo link: https://github.com/gracejry/Stats506-_FA24.git
Problem 1
Create a class to represent rational numbers (numbers of the form a/b for integers a and b). Do this using S4.
For the
rationalclass, define the following:A constructor
A validator that ensures the denominator is non-zero.
A
showmethod.A
simplifymethod, to obtain the simplest form (e.g.simplify(2/4)produces1/2).A
quotientmethod (e.g.quotient(3/7)produces.42857143...). It should support adigitsargument but only in the printing, not the returned result (Hint: what doesprintreturn?).Addition, subtraction, multiplication, division. These should all return a
rational.You’ll (probably) need GCD and LCM as part of some of these calculations; include these functions using Rcpp. Even if you don’t need these functions for another calculation, include them.
#' Rational Number Class
#'
#' Represents rational numbers (fractions) with a numerator and a denominator.
#' Includes a validator to ensure validity of the rational number.
#'
#' @slot numerator Numeric. The numerator of the fraction.
#' @slot denominator Numeric. The denominator of the fraction (non-zero).
#'
#' @section Validation:
#' - The denominator cannot be zero.
#' - Both numerator and denominator must be numeric.
#'
# Define the "rational" S4 class
setClass(
"rational",
slots = list(
numerator = "numeric", # The numerator of the fraction
denominator = "numeric" # The denominator of the fraction
),
# Validation Function
validity = function(object) {
# Check if denominator is zero
if (object@denominator == 0) {
stop("Error: Denominator cannot be zero.") # Stop execution if invalid
}
# Check if numerator and denominator are numeric
if (!is.numeric(object@numerator) || !is.numeric(object@denominator)) {
stop("Error: Both numerator and denominator must be numeric.")
}
TRUE # Object is valid if all checks pass
}
)#' Create a Rational Number
#'
#' A constructor function to create objects of the "rational" S4 class.
#' Automatically simplifies the fraction using the greatest common divisor (GCD).
#'
#' @param numerator Numeric. The numerator of the fraction.
#' @param denominator Numeric. The denominator of the fraction. Must not be zero.
#'
#' @return An object of class "rational".
#'
createRational <- function(numerator, denominator) {
# Check for zero denominator
if (denominator == 0) stop("Error: Denominator cannot be zero.")
# Ensure denominator is positive
if (denominator < 0) {
numerator <- -numerator
denominator <- -denominator
}
# Skip simplification for zero numerator
if (numerator == 0) {
return(new("rational", numerator = numerator, denominator = denominator))
}
# Simplify the fraction
divisor <- gcd_r(numerator, denominator) # Find GCD of numerator and denominator
new("rational", numerator = numerator / divisor, denominator = denominator / divisor)
}#' Display a Rational Number
#'
#' A method to display an object of the "rational" class in a human-readable format.
#'
#' @param object An object of class "rational".
#'
#' @details
#' - If the numerator of the rational number is zero, the output will display as "0 / denominator".
#' - Otherwise, the output will display as "numerator/denominator".
#'
# Define the show method for "rational" class
setMethod(
"show",
"rational",
function(object) {
if (object@numerator == 0) {
cat("0 /", object@denominator, "\n") # Display as "0 / denominator"
} else {
cat(object@numerator, "/", object@denominator, "\n") # Display as "numerator/denominator"
}
}
)#' Simplify a Rational Number
#'
#' Simplifies an object of the "rational" class to its lowest terms.
#'
#' @param r An object of class "rational".
#'
#' @return A new "rational" object that represents the simplified form of the input.
#'
#' @details
#' The function simplifies a rational number by dividing the numerator and denominator by their greatest common divisor (GCD).
#'
# Function to simplify a rational number
simplify <- function(r) {
divisor <- gcd_r(r@numerator, r@denominator) # Find GCD of numerator and denominator
createRational(r@numerator / divisor, r@denominator / divisor) # Return simplified object
}#' Arithmetic Operations for Rational Numbers
#'
#' Define addition, subtraction, multiplication, and division operations for objects of the "rational" class.
#'
#' @param e1 A rational number
#' @param e2 A rational number
#'
#' @return A new "rational" object that represents the result of the operation, simplified to its lowest terms.
#'
#' @details
#' The arithmetic operations are implemented using the following logic:
#' - Addition: Computes a common denominator and adds the numerators.
#' - Subtraction: Computes a common denominator and subtracts the numerators.
#' - Multiplication: Multiplies the numerators and denominators directly.
#' - Division: Multiplies the numerator of the first by the denominator of the second and vice versa. Division by zero is not allowed.
#'
#'
# Addition of two rational numbers
setMethod(
"+",
c("rational", "rational"),
function(e1, e2) {
# Compute new numerator and denominator
numerator <- e1@numerator * e2@denominator + e2@numerator * e1@denominator
denominator <- e1@denominator * e2@denominator
simplify(createRational(numerator, denominator)) # Simplify the result
}
)
# Subtraction of two rational numbers
setMethod(
"-",
c("rational", "rational"),
function(e1, e2) {
# Compute new numerator and denominator
numerator <- e1@numerator * e2@denominator - e2@numerator * e1@denominator
denominator <- e1@denominator * e2@denominator
simplify(createRational(numerator, denominator)) # Simplify the result
}
)
# Multiplication of two rational numbers
setMethod(
"*",
c("rational", "rational"),
function(e1, e2) {
# Compute new numerator and denominator
numerator <- e1@numerator * e2@numerator
denominator <- e1@denominator * e2@denominator
simplify(createRational(numerator, denominator)) # Simplify the result
}
)
# Division of two rational numbers
setMethod(
"/",
c("rational", "rational"),
function(e1, e2) {
if (e2@numerator == 0) stop("Error: Cannot divide by zero.") # Handle division by zero
# Compute new numerator and denominator
numerator <- e1@numerator * e2@denominator
denominator <- e1@denominator * e2@numerator
simplify(createRational(numerator, denominator)) # Simplify the result
}
)#' Compute the Decimal Value of a Rational Number
#'
#' This function computes the decimal (quotient) representation of a rational number.
#' Optionally, the decimal value can be rounded to a specified number of digits.
#'
#' @param r A rational number
#' @param digits An optional non-negative integer specifying the number of decimal places
#' to round the quotient to. If `NULL`, the quotient is returned without rounding.
#'
#' @return The decimal representation of the rational number as a numeric value.
#' If `digits` is specified, the rounded value is returned.
#'
#' @details
#' - If `digits` is not specified (`NULL`), the function returns the exact decimal value of the rational number.
#' - If `digits` is specified, it must meet the following conditions:
#' 1. Be a numeric value.
#' 2. Be a non-negative integer.
#' 3. Decimal and negative values for `digits` are not allowed.
#' - Any invalid `digits` argument will trigger an error.
#'
#'
# Function to compute the decimal (quotient) of a rational number
quotient <- function(r, digits = NULL) {
value <- r@numerator / r@denominator # Compute the decimal value
if (!is.null(digits)) {
# Validate the 'digits' argument
# Ensure 'digits' is numeric, avoid errors caused by passing non-numeric types like strings or logical values
# Ensure 'digits' is an integer, non-integers are ambiguous and undefined
# Ensure 'digits' is non-negative, Decimal places cannot be negative as it would not make logical sense
if (!is.numeric(digits) || digits != as.integer(digits) || digits < 0) {
stop("Error: 'digits' must be a non-negative integer.")
}
return(format(value, digits = digits)) # Return the formatted value directly
}
return(value) # Return the unrounded value
}b. Use your rational class to create three objects:
r1: 24/6r2: 72/30r3: 0/4
r1 <- createRational(24, 6) # Represents 24 / 6
r2 <- createRational(7, 230) # Represents 7 / 230
r3 <- createRational(0, 4) # Represents 0 / 4r14 / 1
r30 / 4
r1 + r2927 / 230
r1 - r2913 / 230
r1 * r214 / 115
r1 / r2920 / 7
r1 + r34 / 1
r1 * r30 / 1
r2 / r3Error in r2/r3: Error: Cannot divide by zero.
quotient(r1)[1] 4
quotient(r2)[1] 0.03043478
quotient(r2, digits = 3)[1] "0.0304"
quotient(r2, digits = 3.14)Error in quotient(r2, digits = 3.14): Error: 'digits' must be a non-negative integer.
quotient(r2, digits = "avocado")Error in quotient(r2, digits = "avocado"): Error: 'digits' must be a non-negative integer.
q2 <- quotient(r2, digits = 3)
q2[1] "0.0304"
quotient(r3)[1] 0
simplify(r1)4 / 1
simplify(r2)7 / 230
simplify(r3)0 / 1
c. Show that your validator does not allow the creation of rational’s with 0 denominator, and check other malformed input to your constructor.
Note that there are a lot of choices to be made here. How are you going to store the class? Two numerics? A vector of length two? A formula? A string? What are users going to pass into the constructor? A string (“24/6”)? Two arguments? A vector?
There is no right answer to those questions. Make the best decision you can, and don’t be afraid to change it if your decision causes unforeseen difficulties.
You may not use any existing R functions or packages that would trivialize this assignment. (E.g. if you found an existing package that does this, or found a function that automatically produces the quotient or simplified version, that is not able to be used.)
Hint: It may be useful to define other functions that I don’t explicitly ask for.
# Case 1: Valid numeric inputs
createRational(24, 6) # Should simplify to 4/14 / 1
# Case 2: Invalid numeric input: Zero denominator
createRational(24, 0) # Expect: "Error: Denominator cannot be zero."Error in createRational(24, 0): Error: Denominator cannot be zero.
# Case 3: Missing numerator or denominator
createRational(24) # Should throw an error due to missing arguments: "Error in createRational: argument is missing, with no default."Error in createRational(24): argument "denominator" is missing, with no default
createRational() # Should throw an error due to missing arguments: "Error in createRational: argument is missing, with no default."Error in createRational(): argument "denominator" is missing, with no default
# Case 4: Empty vector
empty_vec <- c()
createRational(empty_vec[1], empty_vec[2]) # Should throw "Error: Missing numerator or denominator."Error in if (denominator == 0) stop("Error: Denominator cannot be zero."): argument is of length zero
# Case 5: Non-numeric vector
createRational("24", "6") # Should throw "Error: Both numerator and denominator must be numeric."Error in eval(expr, envir, enclos): Not compatible with requested type: [type=character; target=integer].
createRational(24, "six") # Should throw "Error: Both numerator and denominator must be numeric."Error in eval(expr, envir, enclos): Not compatible with requested type: [type=character; target=integer].
# Case 6: Negative denominator
createRational(5, -3) # should simplify to a positive denominator-5 / 3
# Case 7: Numerator is zero
createRational(0, 10) # should simplify to 0/100 / 10
# Case 8: Division by zero rational number
r4 <- createRational(1, 2)
r5 <- createRational(0, 1) # This rational number represents 0
r4 / r5 # Expect: "Error: Cannot divide by zero."Error in r4/r5: Error: Cannot divide by zero.
# Case 9: Invalid 'digits' argument
# non-integer
quotient(createRational(1, 3), digits = "three") # Expect: "Error: 'digits' must be a non-negative integer."Error in quotient(createRational(1, 3), digits = "three"): Error: 'digits' must be a non-negative integer.
# negative integer
quotient(createRational(1, 3), digits = -1) # Expect: "Error: 'digits' must be a non-negative integer."Error in quotient(createRational(1, 3), digits = -1): Error: 'digits' must be a non-negative integer.
Problem 2
Let’s revisit the art data from the last problem set. Use plotly for these.
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("plotly")
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
art_sales <- read_csv("~/Downloads/df_for_ml_improved_new_market.csv")Rows: 4347 Columns: 112
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): eventdate
dbl (111): id, case_id, year, height, width, size_inchsqr, price_usd, meanpr...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Regenerate your plot which addresses the second question from last time:
- Does the distribution of genre of sales across years appear to change?
You may copy your plot from last time, or copy my plot from the solutions, or come up with your own new plot.
# Data preperation
# Add Genre___Multiple column
art_sale1 <- art_sales %>%
mutate(Genre___Multiple = if_else(
Genre___Photography + Genre___Print + Genre___Sculpture + Genre___Painting + Genre___Others > 1,
1, # If an artwork falls into more than one genre, set 'Genre___Multiple' to 1
0 # Otherwise, set it to 0
))
# Transform the genre columns into a long format for easier analysis
art_genres <- art_sale1 %>%
pivot_longer(cols = starts_with("Genre___"), # Select all columns that start with "Genre___"
names_to = "genre", # Create a new column called 'genre' to store genre names
values_to = "is_genre" ) %>% # Create a new column called 'is_genre' to indicate genre presence
filter(is_genre == 1) %>% # Filter to keep only rows where a genre is present
mutate(genre = gsub("Genre___", "", genre)) # Remove the "Genre___" prefix for cleaner genre names
art_genres# A tibble: 5,385 × 109
id case_id year height width size_inchsqr price_usd meanprice_year
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 57649 1997 29 24 696 4160 247.
2 0 57649 1997 29 24 696 4160 247.
3 0 57649 1997 29 24 696 4160 247.
4 1 30468 1997 17 14 238 2340 13.9
5 2 85464 1997 28 22 616 3640 26.5
6 3 27308 1997 32 39 1248 10832 18.2
7 3 27308 1997 32 39 1248 10832 18.2
8 3 27308 1997 32 39 1248 10832 18.2
9 4 82202 1997 46 37 1702 13210 5.26
10 5 60932 1997 50 43 2150 3434 30.4
# ℹ 5,375 more rows
# ℹ 101 more variables: min_price <dbl>, max_price <dbl>,
# medianprice_year <dbl>, cnt_mean <dbl>, cnt_max <dbl>, cnt_median <dbl>,
# cot_mean <dbl>, cot_max <dbl>, cot_median <dbl>, ranking <dbl>,
# fest_biennal <dbl>, private_inst <dbl>, public_inst <dbl>, solo_show <dbl>,
# group_show <dbl>, age <dbl>, estimate_min_usd <dbl>,
# estimate_max_usd <dbl>, estimate_center_usd <dbl>, …
# The plot from last problem set, using ggplot
ggplot(art_genres, aes(x = factor(year), fill = genre)) +
geom_bar(position = "stack", alpha = .75) + # Use stacked bars to show genre proportions within each year and add transparency with alpha = 0.75
labs(title = "Distribution of Art Sales Genres Over Time",
x = "Year",
y = "Count of Sales",
fill = "Genre") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # Rotate x-axis labels for better readability
plot.title = element_text(hjust = 0.5, face = "bold"))# Create a stacked bar chart with plotly
plotly_genre_plot <- art_genres %>%
count(year, genre) %>% # Aggregate counts for each year and genre
plot_ly(
x = ~factor(year), # Use year on the x-axis (converted to factor for categorical grouping)
y = ~n, # Use count on the y-axis
color = ~genre, # Use genre to define colors
type = "bar", # Create a bar chart
text = ~paste("Genre:", genre, "<br>Year:", year, "<br>Count:", n), # Add hover text
hoverinfo = "text" # Show only custom hover text
) %>%
layout(
title = "Distribution of Art Sales Genres Over Time",
barmode = "stack", # Stack the bars to show proportions
xaxis = list(title = "Year"),
yaxis = list(title = "Count of Sales"),
legend = list(title = list(text = "Genre"))
)
plotly_genre_plotThis stacked bar chart shows the distribution of art sales across genres over time. Overall, the total count of sales has grown steadily, with a sharp increase around 2011–2012. Photography consistently dominates the genre distribution, contributing significantly to sales in most years. Sculpture and Print also maintain notable shares, especially during the later years. The growth in the Others and Multiple categories towards the end of the timeline suggests diversification in genres or a broader categorization of artworks in recent years. This trend reflects both increasing sales activity and shifts in genre popularity over time.
b. Generate an interactive plot with plotly that can address both of these questions from last time:
Is there a change in the sales price in USD over time?
How does the genre affect the change in sales price over time?
This should be a single interactive plot, with which a user can manipulate the view to be able to look at change over time overall, or by genre.
# Prepare data for average sales price by year and genre
avg_sales_by_genre <- art_genres %>%
group_by(year, genre) %>%
summarize(avg_price = mean(price_usd, na.rm = TRUE), .groups = "drop")# Create the interactive plot
interactive_plot <- plot_ly(
data = avg_sales_by_genre,
x = ~year, # Year on the x-axis
y = ~avg_price, # Average price on the y-axis
color = ~genre, # Different colors for genres
type = "scatter", # Create a scatter plot
mode = "lines+markers", # Add both lines and markers for better visibility
text = ~paste("Genre:", genre, "<br>Year:", year, "<br>Avg Price:", round(avg_price, 2)), # Hover text
hoverinfo = "text" # Display only custom hover text
) %>%
layout(
title = "Change in Sales Price Over Time by Genre",
xaxis = list(title = "Year"),
yaxis = list(title = "Average Price (USD)"),
legend = list(title = list(text = "Genre"))
)
interactive_plotThe interactive plot reveals that average sales prices across art genres have fluctuated significantly over time, with a notable peak between 2005 and 2010, followed by a decline and stabilization post-2010. Photography experienced the most dramatic spikes, particularly around 2007–2009, likely driven by high-value sales, before declining sharply. Print and Sculpture show steady growth until 2007, after which prices slightly decline or stabilize, while Painting exhibits consistent pricing trends throughout the timeline. The Multiple and Others categories display sporadic peaks but lack consistent trends. These patterns suggest a dynamic market during the mid-2000s, potentially influenced by increased demand, high-profile sales, or external economic factors.
Problem 3
Repeat problem set 4, question 1, using data.table.
library(nycflights13)
library(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
Generate a table (which can just be a nicely printed tibble) reporting the mean and median departure delay per airport. Generate a second table (which again can be a nicely printed tibble) reporting the mean and median arrival delay per airport. Exclude any destination with under 10 flights. Do this exclusion through code, not manually.
Additionally,
Order both tables in descending mean delay.
Both tables should use the airport names not the airport codes.
Both tables should print all rows.
flights_dt <- as.data.table(flights)
airports_dt <- as.data.table(airports)# Calculate departure delays
depart_delay <- flights_dt[
!is.na(dep_delay), # Exclude rows where dep_delay is NA
.(
mean_dep_delay = mean(dep_delay, na.rm = TRUE), # Mean departure delay
median_dep_delay = median(dep_delay, na.rm = TRUE), # Median departure delay
num_flight = .N # Count flights
),
by = origin # Group by origin airport
][
num_flight >= 10 # Keep only origins with at least 10 flights
][
airports_dt, on = .(origin = faa) # Join with airport names
][
!is.na(name) # Ensure airport names are not NA after the join
][
order(-mean_dep_delay) # Order by descending mean departure delay
][
, .(dept_name = name, mean_dep_delay, median_dep_delay) # Select relevant columns
]
# Remove any rows with NA values explicitly
depart_delay <- depart_delay[complete.cases(depart_delay)]
print(depart_delay) dept_name mean_dep_delay median_dep_delay
<char> <num> <num>
1: Newark Liberty Intl 15.10795 -1
2: John F Kennedy Intl 12.11216 -1
3: La Guardia 10.34688 -3
# Calculate arrival delays
arrival_delay <- flights_dt[
!is.na(arr_delay), # Exclude rows where arr_delay is NA
.(
mean_arr_delay = mean(arr_delay, na.rm = TRUE), # Mean arrival delay
median_arr_delay = median(arr_delay, na.rm = TRUE), # Median arrival delay
num_flight = .N # Count flights
),
by = dest # Group by destination airport
][
num_flight >= 10 # Keep only destinations with at least 10 flights
][
airports_dt, on = .(dest = faa) # Join with airport names
][
!is.na(name) # Ensure airport names are not NA after the join
][
order(-mean_arr_delay) # Order by descending mean arrival delay
][
, .(arr_name = name, mean_arr_delay, median_arr_delay) # Select relevant columns
]
# Remove any rows with NA values explicitly
arrival_delay <- arrival_delay[complete.cases(arrival_delay)]
print(arrival_delay) arr_name mean_arr_delay median_arr_delay
<char> <num> <num>
1: Columbia Metropolitan 41.76415094 28.0
2: Tulsa Intl 33.65986395 14.0
3: Will Rogers World 30.61904762 16.0
4: Jackson Hole Airport 28.09523810 15.0
5: Mc Ghee Tyson 24.06920415 2.0
6: Dane Co Rgnl Truax Fld 20.19604317 1.0
7: Richmond Intl 20.11125320 1.0
8: Akron Canton Regional Airport 19.69833729 3.0
9: Des Moines Intl 19.00573614 0.0
10: Gerald R Ford Intl 18.18956044 1.0
11: Birmingham Intl 16.87732342 -2.0
12: Theodore Francis Green State 16.23463687 1.0
13: Greenville-Spartanburg International 15.93544304 -0.5
14: Cincinnati Northern Kentucky Intl 15.36456376 -3.0
15: Savannah Hilton Head Intl 15.12950601 -1.0
16: Manchester Regional Airport 14.78755365 -3.0
17: Eppley Afld 14.69889841 -2.0
18: Yeager 14.67164179 -1.5
19: Kansas City Intl 14.51405836 0.0
20: Albany Intl 14.39712919 -4.0
21: General Mitchell Intl 14.16722038 0.0
22: Piedmont Triad 14.11260054 -2.0
23: Washington Dulles Intl 13.86420212 -3.0
24: Cherry Capital Airport 12.96842105 -10.0
25: James M Cox Dayton Intl 12.68048606 -3.0
26: Louisville International Airport 12.66938406 -2.0
27: Chicago Midway Intl 12.36422360 -1.0
28: Sacramento Intl 12.10992908 4.0
29: Jacksonville Intl 11.84483416 -2.0
30: Nashville Intl 11.81245891 -2.0
31: Portland Intl Jetport 11.66040210 -4.0
32: Greater Rochester Intl 11.56064461 -5.0
33: Hartsfield Jackson Atlanta Intl 11.30011285 -1.0
34: Lambert St Louis Intl 11.07846451 -3.0
35: Norfolk Intl 10.94909344 -4.0
36: Baltimore Washington Intl 10.72673385 -5.0
37: Memphis Intl 10.64531435 -2.5
38: Port Columbus Intl 10.60132291 -3.0
39: Charleston Afb Intl 10.59296847 -4.0
40: Philadelphia Intl 10.12719014 -3.0
41: Raleigh Durham Intl 10.05238095 -3.0
42: Indianapolis Intl 9.94043412 -3.0
43: Charlottesville-Albemarle 9.50000000 -5.0
44: Cleveland Hopkins Intl 9.18161129 -5.0
45: Ronald Reagan Washington Natl 9.06695204 -2.0
46: Burlington Intl 8.95099602 -4.0
47: Buffalo Niagara Intl 8.94595186 -5.0
48: Syracuse Hancock Intl 8.90392501 -5.0
49: Denver Intl 8.60650021 -2.0
50: Palm Beach Intl 8.56297210 -3.0
51: Bob Hope 8.17567568 -3.0
52: Fort Lauderdale Hollywood Intl 8.08212154 -3.0
53: Bangor Intl 8.02793296 -9.0
54: Asheville Regional Airport 8.00383142 -1.0
55: Pittsburgh Intl 7.68099053 -5.0
56: Gallatin Field 7.60000000 -2.0
57: NW Arkansas Regional 7.46572581 -2.0
58: Tampa Intl 7.40852503 -4.0
59: Charlotte Douglas Intl 7.36031885 -3.0
60: Minneapolis St Paul Intl 7.27016886 -5.0
61: William P Hobby 7.17618819 -4.0
62: Bradley Intl 7.04854369 -10.0
63: San Antonio Intl 6.94537178 -9.0
64: South Bend Rgnl 6.50000000 -3.5
65: Louis Armstrong New Orleans Intl 6.49017497 -6.0
66: Key West Intl 6.35294118 7.0
67: Eagle Co Rgnl 6.30434783 -4.0
68: Austin Bergstrom Intl 6.01990875 -5.0
69: Chicago Ohare Intl 5.87661475 -8.0
70: Orlando Intl 5.45464309 -5.0
71: Detroit Metro Wayne Co 5.42996346 -7.0
72: Portland Intl 5.14157973 -5.0
73: Nantucket Mem 4.85227273 -3.0
74: Wilmington Intl 4.63551402 -7.0
75: Myrtle Beach Intl 4.60344828 -13.0
76: Albuquerque International Sunport 4.38188976 -5.5
77: George Bush Intercontinental 4.24079040 -5.0
78: Norman Y Mineta San Jose Intl 3.44817073 -7.0
79: Southwest Florida Intl 3.23814963 -5.0
80: San Diego Intl 3.13916574 -5.0
81: Sarasota Bradenton Intl 3.08243131 -5.0
82: Metropolitan Oakland Intl 3.07766990 -9.0
83: General Edward Lawrence Logan Intl 2.91439222 -9.0
84: San Francisco Intl 2.67289152 -8.0
85: Yampa Valley 2.14285714 2.0
86: Phoenix Sky Harbor Intl 2.09704733 -6.0
87: Montrose Regional Airport 1.78571429 -10.5
88: Los Angeles Intl 0.54711094 -7.0
89: Dallas Fort Worth Intl 0.32212685 -9.0
90: Miami Intl 0.29905978 -9.0
91: Mc Carran Intl 0.25772849 -8.0
92: Salt Lake City Intl 0.17625459 -8.0
93: Long Beach -0.06202723 -10.0
94: Martha\\\\'s Vineyard -0.28571429 -11.0
95: Seattle Tacoma Intl -1.09909910 -11.0
96: Honolulu Intl -1.36519258 -7.0
97: John Wayne Arpt Orange Co -7.86822660 -11.0
98: Palm Springs Intl -12.72222222 -13.5
arr_name mean_arr_delay median_arr_delay
planes_dt <- as.data.table(planes)
# Calculate flight speed and join flights with planes
new_flights <- flights_dt[
!is.na(air_time) & !is.na(distance), # Exclude rows with missing air_time or distance
flight_speed := distance / (air_time / 60) # Calculate flight speed (MPH)
][
planes_dt, on = "tailnum", nomatch = 0 # Join with planes dataset on tailnum
]
# Calculate average speed and flight count for each model
fastest_model <- new_flights[
, .(
avg_speed = mean(flight_speed, na.rm = TRUE), # Calculate average speed for the model
flights_cnt = .N # Count the number of flights for the model
),
by = model # Group by model
][
order(-avg_speed) # Order by descending average speed
][
1 # Select the row with the highest average speed
]
print(fastest_model) model avg_speed flights_cnt
<char> <num> <int>
1: 777-222 482.6254 4